home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-02
/
kruse_11.zip
/
11SOL.EXT
< prev
next >
Wrap
Text File
|
1990-12-01
|
18KB
|
582 lines
{Section 11.3 Phase 1: Splitting the Text into Words}
{Exercise E6}
const
nfiles = 8; {number of temporary files for unprocessed words}
type
filecode = 1..nfiles;
var
RefFile: array[filecode] of fileref;
{local files used for auxiliary storage of words from phase 1 to phase 2:
Normally, a separate file exist for each initial letter; this version uses
nfiles files due to operating system constraints.}
procedure SplitWords;
var
outcount: array[filecode] of integer; {counter for word files}
code: filecode; {Into which file does word go?}
{The remainder of the local declarations are unchanged.}
begin {procedure SplitWords}
Initialize; {sets up files, hash table, constants}
GetWord(w); {Obtain a single word from InText.}
while not endinput do
begin
x := HashAddress(w);
if w <> hash[x] then
begin {Not in hash table; put into RefFile.}
code := FindFile( w[1] );
outcount[code] := outcount[code] + 1;
with RefFile[code]^ do {Update the storage file.}
begin
wd := w;
pg := pagecount
end;
Put(RefFile[code])
end;
GetWord(w)
end;
Conclude {writes word counts to output}
end; {procedure SplitWords}
function FindFile( ch: letter): filecode;
{Uses binary decision tree to select one of nfiles = 8 files depending
on the letter ch. }
begin {function FindFile}
if ch < 'M' then
if ch < 'E' then
if ch < 'C' then FindFile := 1
else FindFile := 2
else if ch < 'H' then FindFile := 3
else FindFile := 4
else if ch < 'S' then
if ch < 'P' then FindFile := 5
else FindFile := 6
else if ch < 'T' then FindFile := 7
else FindFile := 8
end; {function FindFile}
for ch := A to Z do
begin
rewrite( RefFile[ch] );
outcount[ch] := 0
end;
for i := 1 to nfiles do
begin
rewrite( RefFile[i] );
outcount[i] := 0
end;
procedure Conclude;
{Writes out counts of various word lists. For some systems, it is
necessary to close files, which should be done here.}
var
i: integer; {loop index}
begin {procedure Conclude}
writeln('The total number of words read in is ', wordcount:7);
writeln;
writeln('The number of words to process further in the next stage,');
writeln('in each temporary file, is below.');
writeln('a - b c - d e - g h - l m - o p - r s t - z');
for i := 1 to nfiles do
write(outcount[i]:7);
writeln;
writeln
end; {procedure Conclude}
{Section 11.4 Phase 2: Classifying the Words}
{Exercise E3}
procedure ClassifyWords;
{The references stored in the temporary files are placed into a list,
the words from the file InIndex are compared with the words in the list
as they are merged into the file NewIndex.}
type
wordtype = (hash, count, index); {ways to process a word}
pointref = ^reflist;
reflist = record {list of page references}
pg: integer;
next: pointref
end;
pointer = ^node;
node = record {node of list storing words}
wd: word;
kind: wordtype;
ct: integer;
ref: pointref;
next: pointer
end;
{Cannot use varying types as wordtype is not known upon first reading.}
list = record
head: pointer
end;
var
code: filecode; {index used to loop through temporary files}
NewList: list;
procedure Merge(p, q: pointer; var r: pointer);
{Merges two sorted lists into one, that will begin at r;
requires that both lists be nonempty. This version is modified
slightly from the version listed in the text due to a difference
in the data structures used.}
var
s: pointer; {always points to last node of sorted list}
begin {procedure Merge}
if (p = nil) or (q = nil) then
writeln('Merge called with empty list(s).');
if p^.wd <= q^.wd then {First find the head, r, of the merged list.}
begin {Note the change from .info.key to .wd. }
r := p;
p := p^.next
end
else begin
r := q;
q := q^.next
end;
s := r; {s always points to the last entry of the merged list.}
while (p <> nil) and (q <> nil) do
if p^.wd <= q^.wd then {Note the change from .info.key to .wd. }
begin
s^.next := p; {Attach the node with the smaller key to the sorted list.}
s := p;
p := p^.next {Advance to the next unmerged node.}
end
else begin
s^.next := q;
s := q;
q := q^.next
end;
if p = nil then {After one list is exhausted, attach the remainder of the other one.}
s^.next := q
else
s^.next := p
end; {procedure Merge}
{Include the procedures MergeSort and Divide from Chapter 7 here.}
procedure MainMergeSort(var L: list);
{ Main procedure to invoke recursive procedure MergeSort, as listed
in the text. }
begin
MergeSort(L.head)
end;
procedure InitializeList(var L: list);
begin
L.head := nil
end;
procedure Insert(x: reference; var L: list);
{ Inserts the reference into the hash table of references. }
var
done: Boolean;
p: pointer;
q: pointref;
begin {procedure Insert}
done := false;
p := L.head;
while (p <> nil) and (not done) do
begin
if p^.wd = x.wd then {The word is already in the list, update its node.}
begin
p^.ct := p^.ct + 1;
new(q);
q^.pg := x.pg;
q^.next := p^.ref;
p^.ref := q;
done := true
end
else
p := p^.next
end;
if not done then
begin {Insert a new entry if the word is not already in the table.}
p := nil;
new(p);
p^.wd := x.wd;
p^.ct := 1; {Initialize the count and the page references.}
new(q);
q^.pg := x.pg;
q^.next := nil;
p^.ref := q;
p^.next := L.head;
L.head := p
end
end; {procedure Insert}
procedure Append(p: pointer; var L: list);
{ Append the nodes pointed to by p to the end of the list L. }
var
q: pointer;
begin {procedure Append}
q := L.head;
if q = nil then {The list is empty, make p the beginning of the list.}
L.head := p
else begin
while q^.next <> nil do {Find the end of the list.}
q := q^.next;
q^.next := p
end
end; {procedure Append}
procedure Place(var F: fileref; var L: list);
{ Places the words in file F into the list of words. }
var
x: reference;
temp: pointer;
begin {procedure Place}
temp := L.head; {Save the words that have already been processed.}
L.head := nil;
reset(F);
while not eof(F) do {Insert all the words into the list.}
begin
x := F^;
get(F);
Insert(x, L)
end;
MainMergeSort(L); {Sort the list and append the other work to the list.}
Append(temp, L)
end; {procedure Place}
procedure RemoveFirst(var p: pointer; var L: list);
{ Removes the first node from the list L. }
begin {procedure RemoveFirst}
p := L.head;
if not Empty(L) then {standard list operation}
begin
L.head := L.head^.next;
p^.next := nil
end
end; {procedure RemoveFirst}
procedure ReadReference(var r: pointer; var F: text);
{ Reads reference from the file F. }
var
k: char;
begin {procedure ReadReference}
if eof(F) then
r := nil
else begin
ReadWord(F, r^.wd);
readln(F, k);
case k of
'F', 'f': r^.kind := hash;
'C', 'c': begin
r^.kind := count;
r^.ct := 0
end;
'I', 'i': begin
r^.kind := index;
r^.ref := nil
end
end
end
end; {procedure ReadReference}
procedure WriteReference(p: pointer; var NewIndex, NewHashFile: text);
{writes a word to the appropriate file with the appropriate associated information}
var
q: pointref;
begin {procedure WriteReference}
with p^ do
case kind of
hash: begin {Write the word to the hash file.}
WriteWord(NewHashFile, wd);
writeln(NewHashFile)
end;
count:begin {Write the word and its frequency to the new index file.}
WriteWord(NewIndex, wd);
write(NewIndex, 'c');
writeln(NewIndex, ct:5)
end;
index:begin {Write the word and its page numbers to the new index file.}
WriteWord(NewIndex, wd);
write(NewIndex, 'i');
q := ref;
while q <> nil do
begin
write(NewIndex, q^.pg:5);
q := q^.next
end;
writeln(NewIndex)
end
end
end; {procedure WriteReference}
procedure GetWordType(p: pointer);
{ Request the user to specify the category of the given word. }
var
response: char;
begin {procedure GetWordType}
with p^ do
begin
repeat
WriteWord(output, wd);
write(' is (F, C, I)?');
readln(response)
until response in ['F', 'f', 'C', 'c', 'I', 'i'];
case response of
'F', 'f': kind := hash;
'C', 'c': kind := count;
'I', 'i': kind := index
end
end
end; {procedure GetWordType}
procedure Delete(var p: pointer);
{ Delete the word p^ as well as all of the page references associated with it. }
var
q, r: pointref;
begin {procedure Delete}
if p^.kind = index then
begin
q := p^.ref;
while q <> nil do
begin {Dispose the list of page references associated with the word.}
r := q^.next;
dispose(q);
p^.ref := r;
q := r
end
end;
dispose(p) {dispose the node itself}
end; {procedure Delete}
procedure CompareAndMerge(var L: list; var InIndex, NewIndex, NewHashFile: text);
{ Compare the list L with InIndex, merge if was found. }
var
p, r: pointer;
begin {procedure CompareAndMerge}
RemoveFirst(p, L);
new(r);
ReadReference(r, InIndex);
while p <> nil do
if r = nil then
begin
GetWordType(p);
WriteReference(p, NewIndex, NewHashFile);
Delete(p); {Remove reference list and node from memory.}
RemoveFirst(p, L)
end
if p^.wd < r^.wd then
begin
GetWordType(p);
WriteReference(p, NewIndex, NewHashFile);
Delete(p); {Remove reference list and node from memory.}
RemoveFirst(p, L)
end
else if p^.wd > r^.wd then {Do not write a word that is not used to NewIndex.}
ReadReference(r, InIndex)
else begin {p^.wd = r^.wd}
p^.kind := r^.kind;
WriteReference(p, NewIndex, NewHashFile);
Delete(p); {Remove reference list and node from memory.}
RemoveFirst(p, L);
ReadReference(r, InIndex)
end
end; {procedure CompareAndMerge}
begin {procedure ClassifyWords}
reset(InIndex);
rewrite(NewIndex);
rewrite(NewHashFile);
InitializeList(NewList);
for code := nfiles downto 1 do {Place the words from each file into the list.}
Place(RefFile[code], NewList);
if not Empty(NewList) then {standard list operation}
CompareAndMerge(NewList, InIndex, NewIndex, NewHashFile)
end; {procedure ClassifyWords}
{Exercise E4}
procedure ClassifyWords;
{The references stored in the temporary files are placed in a new hash table,
the words from the file InIndex are compared with the words in the new table
as they are merged into the file NewIndex.}
const
RefTableSize = 3023; {Size of the hash table to temporarily store words.}
RefTableMax = 3022;
type
wordtype = (hash, count, index); {ways to process a word}
pointref = ^reflist;
reflist = record {list of page references}
pg: integer;
next: pointref
end;
pointer = ^node;
node = record {Node of list storing words.}
wd: word;
kind: wordtype;
ct: integer;
ref: pointref;
next: pointer
end;
{Cannot use varying types as wordtype is not known upon first reading.}
list = record
head: pointer
end;
RefHashTable = array[0..RefTableMax] of list;
var
code: filecode; {index used to loop through temporary files}
RefTable: RefHashTable; {stores all references in memory}
NewList: list;
procedure InitializeTable(var RefTable: RefHashTable);
var i: integer;
begin {procedure InitializeTable}
for i := 0 to RefTableMax do
RefTable[i].head := nil
end; {procedure InitializeTable}
function RefTableAddress(x: reference): integer;
{ Returns hashed address of reference. }
var
i, h: integer;
begin {function Hash}
h := 0;
with x do
for i := 1 to maxwd do
h := h + ord(wd[i]);
RefTableAddress := h mod RefTableSize
end; {function Hash}
procedure Insert(x: reference; pos: integer; var RefTable: RefHashTable);
{inserts the reference into the hash table of references}
var
done: Boolean;
p: pointer;
q: pointref;
begin {procedure Insert}
done := false;
p := RefTable[pos].head;
while (p <> nil) and (not done) do
begin {Search for the word, update the reference if it is found.}
if p^.wd = x.wd then begin
p^.ct := p^.ct + 1; {Update count and page reference.}
new(q);
q^.pg := x.pg;
q^.next := p^.ref;
p^.ref := q;
done := true
end
else
p := p^.next
end;
if not done then begin {Insert a new entry if the word is not in the table.}
p := nil;
new(p);
p^.wd := x.wd;
p^.ct := 1; {Initialize the count and the page references.}
new(q);
q^.pg := x.pg;
q^.next := nil;
p^.ref := q;
p^.next := RefTable[pos].head;
RefTable[pos].head := p
end
end; {procedure Insert}
procedure Place(var F: fileref; var RefTable: RefHashTable);
{places the words in file F into the reference table}
var
x: reference;
begin {procedure Place}
reset(F);
while not eof(F) do begin
x := F^;
get(F);
Insert(x, RefTableAddress(x), RefTable)
end
end; {procedure Place}
procedure LinkEntries(var RefTable: RefHashTable; var NewList: list);
{ The references in the hashed table are combined into the list NewList. }
var
i: integer;
p: pointer;
begin {procedure LinkEntries}
i := 0;
while (i < RefTableMax) and Empty(RefTable[i]) do {Find the first entry.}
i := i + 1;
if i <= RefTableMax then
begin
NewList.head := RefTable[i].head; {Initialize the list to point to the first entry.}
p := RefTable[i].head;
if p <> nil then {Find the end of the first chain.}
while p^.next <> nil do
p := p^.next;
while (i < RefTableMax) do {Link remaining entries into the list.}
begin
i := i + 1;
if not Empty(RefTable[i]) then {standard list procedure}
begin
p^.next := RefTable[i].head;
while p^.next <> nil do {Move p to the end of the chain.}
p := p^.next
end
end
end
else
NewList.head := nil
end; {procedure LinkEntries}
{See the previous exercise for the following procedures.}
procedure RemoveFirst(var p: pointer; var L: list);
procedure ReadReference(var r: pointer; var F: text);
procedure WriteReference(p: pointer; var NewIndex, NewHashFile: text);
procedure GetWordType(p: pointer);
procedure Delete(var p: pointer);
procedure CompareAndMerge(var L: list; var InIndex, NewIndex, NewHashFile: text);
procedure Merge(p, q: pointer; var r: pointer);
{Include the procedures MergeSort and Divide from Chapter 7 here.}
procedure MainMergeSort(var L: list);
begin {procedure ClassifyWords}
reset(InIndex);
rewrite(NewIndex);
rewrite(NewHashFile);
InitializeTable(RefTable);
for code := 1 to nfiles do {Place all the words into the reference hash table.}
Place(RefFile[code], RefTable);
LinkEntries(RefTable, NewList); {Link the entries of the table into a list.}
MainMergeSort(NewList);
if not Empty(NewList) then
CompareAndMerge(NewList, InIndex, NewIndex, NewHashFile)
end; {procedure ClassifyWords}